import os
import re
import json
from utils import *
from retrieval_base import retrieval_base
from retrieval_benchmark import retrieval_benchmark_base

class Keyword_Retrieval(retrieval_base):
    def __init__(self, repo_dir, repo_full_name, default_branch):
        super().__init__(repo_dir, repo_full_name)
        self.build_tools_dict = get_build_dict(self.repo_dir)

        self.base_url = f'https://github.com/{self.repo_full_name}/blob/{default_branch}'
        
    def get_highest_ranked_file(self, file_dict):
        
        build_tools_ranking = {
            'make': 0,
            'cmake': 0,
            'autoconf': 0,
            'ninja': 0,
            'readme': 1,
            'install': 2,
            'build': 3, ### We consider build to be the most important one
        }
    
        best_rank = -1
        best_file = ""

        for key, files in file_dict.items():
            if key in build_tools_ranking:
                rank = build_tools_ranking[key]
                if rank > best_rank:
                    best_rank = rank
                    best_file = files[0]  # Return the first file associated with that key

        return best_file

    def predict_target_link(self):
        self.target_link = self.get_highest_ranked_file(self.build_tools_dict)
        self.target_url = self.base_url + '/' + self.target_link
        return self.target_url
    
    def predict_retrieval_trajectory(self):
        return [self.target_url]


class Keyword_Retrieval_Benchmark(retrieval_benchmark_base):
    def __init__(self, input_raw_data_path, output_benchmark_path, cloned_repos_dir, output_retrieval_results_file_path, pre_computed_benchmark_file_path=None, pre_computed_retrieval_results_path=None, **kwargs):
        super().__init__(input_raw_data_path, output_benchmark_path, cloned_repos_dir, pre_computed_benchmark_file_path, output_retrieval_results_file_path, pre_computed_retrieval_results_path, multi_processing=False)


    def evaluate_trajectory(self, index, predicted_trajectories):

        ground_truth_trajectory = self.get_ground_truth_trajectory(index) 
        coverage = 0.0      
        if predicted_trajectories == ground_truth_trajectory:
            # If the trajectory is the same as the ground truth trajectory, return immediately
            self.trajectory_accuracy += 1
            self.trajectory_coverage += 1
            self.trajectory_length += len(predicted_trajectories)
        
        else:
            coverage = self.calculate_trajectory_coverage(predicted_trajectories, ground_truth_trajectory)
            self.trajectory_length += len(predicted_trajectories)
            self.trajectory_coverage += coverage
        return coverage         
    
    def evaluate_target_link(self, index, predicted_target_link):
        ground_truth_target_link = self.get_ground_truth_target_link(index) # Type, list
        if predicted_target_link in ground_truth_target_link:
            self.target_link_accuracy += 1
        return predicted_target_link
    
    def generate_single_retrieval_result(self, index):
        '''
        Given the index of the repo in the retrieval benchmark, generate the retrieval results and then evaluate them
        '''
        benchmark_data = self.get_item(index)
        repo_dir = benchmark_data['repo_dir']
        repo_full_name = f"{benchmark_data['repo_url'].split('/')[-2]}/{benchmark_data['repo_url'].split('/')[-1]}"    
        default_branch = benchmark_data['default_branch']
        retrieval_class = Keyword_Retrieval(repo_dir, repo_full_name, default_branch)
        predicted_target_link = retrieval_class.predict_target_link()
        predicted_trajectories = retrieval_class.predict_retrieval_trajectory()
        self.evaluate_trajectory(index, predicted_trajectories)
        self.evaluate_target_link(index, predicted_target_link)
        retrieval_results = {  
            "repo_name": benchmark_data['repo_name'],
            "repo_dir": benchmark_data['repo_dir'],
            "ground_truth_trajectory": benchmark_data['retrieval_trajectory'],
            "predicted_trajectory": predicted_trajectories,            "ground_truth_target_link": benchmark_data['retrieval_target_link'],
            "predicted_target_link": predicted_target_link
        }
        return retrieval_results

if __name__ == '__main__':
    repo_dir = '/mnt/midnight/steven_zhang/LLM_assisted_compilation/cloned_repos/bitcoin'
    # retrieval = Keyword_Retrieval(repo_dir, 'bitcoin/bitcoin')
    # print(retrieval.build_tools_dict)
    # print(retrieval.predict_target_link())
    # print(retrieval.predict_retrieval_trajectory())
